Correlation Heatmap
# In order to identify key predictors and relationships between variables, we create a correlation heatmap between numerical variables
# Select numeric columns
numeric_data <- main[, sapply(main, is.numeric)]
# Calculate correlation matrix
corr_matrix <- cor(numeric_data, use = "complete.obs")
# Plot correlation heatmap
ggcorrplot(corr_matrix, method = "circle", lab = FALSE) +
labs(title = "Correlation Heatmap") +
theme_minimal()

# Set correlations below 0.5 or above -0.5 to NA
filtered_corr <- corr_matrix
filtered_corr[abs(filtered_corr) < 0.5] <- NA
# Define pairs to exclude
exclude_pairs <- list(
c("worr_job_dummy", "worr_job_categorical"),
c("worr_job_categorical", "worr_job_dummy"),
c("worr_economic_dummy", "worr_economic_categorical"),
c("worr_economic_categorical", "worr_economic_dummy"),
c("worr_health_dummy", "worr_health_categorical"),
c("worr_health_categorical", "worr_health_dummy"),
c("bmi_categorical", "BMI"),
c("BMI", "bmi_categorical"),
c("BMI", "weight"),
c("weight", "BMI"),
c("germborn", "migback"),
c("migback", "germborn"),
c("worr_financial_dummy", "worr_financial_categorical"),
c("worr_financial_categorical", "worr_financial_dummy"),
c("age", "gebjahr"),
c("gebjahr", "age"),
c("bmi_categorical", "weight"),
c("weight", "bmi_categorical"),
c("worr_economic_dummy", "worr_economic_categorical"),
c("health_satisfaction", "health"),
c("work_time_weekly", "net_income"),
c("syear", "pid"),
c("pid", "syear"),
c("weight", "height"),
c("worr_health_categorical", "health"),
c("work_time_weekly", "unemp_dummy"),
c("life_satisfaction", "health_satisfaction"),
c("worr_health_categorical", "health_satisfaction"),
c("health_in_2yrs", "health"),
c("male", "height"),
c("health_in_2yrs", "health_satisfaction")
)
filtered_table <- as.data.frame(as.table(filtered_corr)) %>%
filter(
!is.na(Freq), # Exclude NA correlations
Var1 != Var2, # Exclude diagonal elements
!(paste(Var1, Var2, sep = "_") %in% sapply(exclude_pairs, function(x) paste(x[1], x[2], sep = "_"))) # Exclude specific pairs
) %>%
arrange(desc(abs(Freq))) # Sort by absolute correlation
# View the filtered table
filtered_table # Display the table
# Create a bar plot
# Rename variables for prettier and clearer names
filtered_table <- filtered_table %>%
mutate(
Variable_Pairs = case_when(
Var1 == "health" & Var2 == "health_satisfaction" ~ "Health & Health Satisfaction",
Var1 == "height" & Var2 == "male" ~ "Height & Gender (Male)",
Var1 == "net_income" & Var2 == "work_time_weekly" ~ "Net Income & Work Time Weekly",
Var1 == "health" & Var2 == "health_in_2yrs" ~ "Health & Health in 2 years",
Var1 == "height" & Var2 == "weight" ~ "Height & Weight",
Var1 == "health_satisfaction" & Var2 == "health_in_2yrs" ~ "Health Satisfaction & Health in 2 years",
Var1 == "health_satisfaction" & Var2 == "life_satisfaction" ~ "Health Satisfaction & Life Satisfaction",
Var1 == "health_satisfaction" & Var2 == "worr_health_categorical" ~ "Health Satisfaction & Worrying about Health (Categorical)",
Var1 == "health" & Var2 == "worr_health_categorical" ~ "Health & Worrying about Health (Categorical)",
Var1 == "unemp_dummy" & Var2 == "work_time_weekly" ~ "Unemployment (Dummy) & Work Time Weekly",
TRUE ~ paste(Var1, Var2, sep = " & ")
)
)
# Create the bar plot
ggplot(filtered_table, aes(x = reorder(Variable_Pairs, Freq), y = Freq, fill = Freq > 0)) +
geom_bar(stat = "identity", color = "black") +
coord_flip() + # Flip coordinates for better readability
labs(
title = "Correlation Plot",
subtitle = "Visualizing selected Correlations"
) +
scale_fill_manual(
values = c("#FF0000", "#163E64"), # Red for negative, dark blue for positive
guide = "none"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
# Gridlines
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
# Axis styling
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 12, color = "#1E2B4F"),
axis.text.y = element_text(size = 12, face = "bold", color = "#1E2B4F"),
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)),
plot.subtitle = element_text(size = 14, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)),
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20),
# Legend styling
legend.position = "none"
)

#Save Graph
ggsave("correlation_plot.png", plot = last_plot(), width = 16, height = 8, dpi = 300)
Gender Disparities
# In order to highlight gender disparities in health perceptions, we create these graphs per gender
# Group data by pid and gender (male), and summarize
main_unique <- main %>%
group_by(pid, male) %>%
summarize(
health = mean(health, na.rm = TRUE),
health_decline_2yrs = mean(health_decline_2yrs, na.rm = TRUE),
worr_health_dummy = mean(worr_health_dummy, na.rm = TRUE),
health_satisfaction = mean(health_satisfaction, na.rm = TRUE),
.groups = "drop"
)
# Health per Gender: Violin Plot
ggplot(main_unique, aes(x = factor(male), y = health, fill = factor(male))) +
geom_violin(trim = FALSE, alpha = 0.7, color = "black") +
stat_summary(fun = "mean", geom = "point", shape = 16, size = 3, color = "black") + # Add mean points
geom_text(stat = "summary", fun = "mean", aes(label = round(after_stat(y), 2)), vjust = -0.5) + # Add mean labels
scale_x_discrete(labels = c("0" = "Female", "1" = "Male")) + # Update x-axis labels
scale_fill_manual(values = c("#FF0000", "#163E64")) + # Red for Female, Dark Blue for Male
labs(
title = "Health Distribution by Gender",
subtitle = "Violin Plot Showing Health Scores",
x = "Gender",
y = "Health"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
axis.text.x = element_text(size = 12, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
plot.subtitle = element_text(size = 14, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold subtitle
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space on the right and top/bottom
# Legend styling
legend.position = "none" # Remove legend
)

#Save Graph
ggsave("gender_health.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Graph for health_decline_2yrs per Gender: Histogram
ggplot(main_unique, aes(x = factor(male), y = health_decline_2yrs, fill = factor(male))) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2, color = "black") + # Add transparency and define outliers
scale_fill_manual(values = c("#FF0000", "#163E64"), labels = c("Female", "Male"), name = "Gender") +
scale_x_discrete(labels = c("0" = "Female", "1" = "Male")) + # Update x-axis labels
labs(
title = "Health Decline in 2 Years by Gender",
subtitle = "Boxplot Comparison of Health Decline Across Genders",
x = "Gender",
y = "Health Decline"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
axis.text.x = element_text(size = 12, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
plot.subtitle = element_text(size = 14, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold subtitle
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space on the right and top/bottom
# Legend styling
legend.position = "none"
)

#Save Graph
ggsave("gender_health_decline.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Graph for worr_health_dummy per Gender: Histogram
ggplot(main_unique, aes(x = factor(male), y = worr_health_dummy, fill = factor(male))) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2, color = "black") + # Add transparency and define outliers
scale_fill_manual(values = c("#FF0000", "#163E64"), labels = c("Female", "Male"), name = "Gender") +
scale_x_discrete(labels = c("0" = "Female", "1" = "Male")) + # Update x-axis labels
labs(
title = "Health Worries by Gender",
subtitle = "Boxplot of Health Worries for Females and Males",
x = "Gender",
y = "Health Worries"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
axis.text.x = element_text(size = 12, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
plot.subtitle = element_text(size = 14, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold subtitle
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space on the right and top/bottom
# Legend styling
legend.position = "None"
)

#Save Graph
ggsave("gender_worr_health.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Graph for health_satisfaction per Gender: Density plot
ggplot(main_unique, aes(x = health_satisfaction, fill = factor(male), color = factor(male))) +
geom_density(alpha = 0.5, size = 1) + # Add transparency and adjust line thickness
scale_fill_manual(values = c("#FF0000", "#163E64"), labels = c("Female", "Male"), name = "Gender") +
scale_color_manual(values = c("#FF0000", "#163E64"), labels = c("Female", "Male"), name = "Gender") +
labs(
title = "Density of Health Satisfaction by Gender",
subtitle = "Density Plot Comparing Female and Male Responses",
x = "Health Satisfaction",
y = "Density",
fill = "Gender",
color = "Gender"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
axis.text.x = element_text(size = 12, color = "#1E2B4F"), # Dark blue x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
plot.subtitle = element_text(size = 14, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold subtitle
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space on the right and top/bottom
# Legend styling
legend.position = "none"
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#Save Graph
ggsave("gender_health_satisfaction.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
We notice that gender differences are largley insignificant on health
variables. Men display slightly larger health, health satisfaction and
lower worries bout health but these. differences are rather small. We,
therefore, do not expect gender to be a strong predictor.
Educational Level Disparities
# Group data by pid and educ, and summarize
main_educ <- main %>%
group_by(pid, educ) %>%
summarize(
health = mean(health, na.rm = TRUE),
health_decline_2yrs = mean(health_decline_2yrs, na.rm = TRUE),
worr_health_dummy = mean(worr_health_dummy, na.rm = TRUE),
health_satisfaction = mean(health_satisfaction, na.rm = TRUE),
.groups = "drop"
)
# Count of individuals per Educ
ggplot(main_educ %>% filter(educ != 0), aes(x = factor(educ), fill = factor(educ))) +
geom_bar(alpha = 0.7, color = "black") +
scale_x_discrete(
labels = c(
"1" = "Primary",
"2" = "Lower Secondary",
"3" = "Upper Secondary",
"4" = "Post-Secondary",
"5" = "Short-Cycle Tertiary",
"6" = "Bachelor's",
"7" = "Master's",
"8" = "Doctoral"
),
limits = c("1", "2", "3", "4", "5", "6", "7", "8")
) +
scale_fill_brewer(palette = "RdBu") +
labs(
title = "Distribution of Individuals by Educational Level",
x = "Educational Level",
y = "Count",
fill = "Educational Level"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
plot.subtitle = element_text(size = 14, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold subtitle
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space on the right and top/bottom
# Legend styling
legend.position = "none"
)

#Save Graph
ggsave("educ.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Health per Educ:
ggplot(main_educ %>% filter(educ != 0), aes(x = health, fill = factor(educ))) +
geom_density(alpha = 0.7, color = "black") +
facet_wrap(~ factor(educ), ncol = 2, labeller = as_labeller(c(
"1" = "Primary",
"2" = "Lower Secondary",
"3" = "Upper Secondary",
"4" = "Post-Secondary",
"5" = "Short-Cycle Tertiary",
"6" = "Bachelor's",
"7" = "Master's",
"8" = "Doctoral"
))) +
scale_fill_brewer(palette = "RdBu", guide = "none") +
labs(
title = "Density of Health by Educational Level",
x = "Health",
y = "Density",
fill = "Educational Level"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
axis.text.x = element_text(size = 12, color = "#1E2B4F"), # Dark blue x-axis labels
axis.text.y = element_blank(), # Dark blue y-axis labels
# Facet title styling
strip.text = element_text(size = 12, face = "bold", color = "#1E2B4F", margin = ggplot2::margin(b = 5)), # Style facet labels
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space on the right and top/bottom
)

#Save Graph
ggsave("educ_health.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Graph for health_decline_2yrs per educ:
ggplot(main_educ %>% filter(educ != 0), aes(x = factor(educ), y = health_decline_2yrs, fill = factor(educ))) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2, color = "black") + # Add transparency and black border
scale_x_discrete(
labels = c(
"1" = "Primary",
"2" = "Lower Secondary",
"3" = "Upper Secondary",
"4" = "Post-Secondary",
"5" = "Short-Cycle Tertiary",
"6" = "Bachelor's",
"7" = "Master's",
"8" = "Doctoral"
)
) +
scale_fill_brewer(palette = "RdBu", guide = "none") +
labs(
title = "Health Decline in 2 Years by Educational Level",
subtitle = "Boxplot of Health Decline Across Education Levels",
x = "Educational Level",
y = "Health Decline",
fill = "Educational Level"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
plot.subtitle = element_text(size = 12, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold subtitle
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space on the right and top/bottom
# Legend styling
legend.position = "none" # Remove legend
)

#Save Graph
ggsave("educ_health_decline.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Graph for worr_health_dummy per educ:
ggplot(main_educ %>% filter(educ != 0), aes(x = factor(educ), y = worr_health_dummy, fill = factor(educ))) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2, color = "black") + # Add transparency and black borders
scale_x_discrete(
labels = c(
"1" = "Primary",
"2" = "Lower Secondary",
"3" = "Upper Secondary",
"4" = "Post-Secondary",
"5" = "Short-Cycle Tertiary",
"6" = "Bachelor's",
"7" = "Master's",
"8" = "Doctoral"
)
) +
scale_fill_brewer(palette = "RdBu", guide = "none") +
labs(
title = "Health Worries by Educational Level",
subtitle = "Boxplot of Health Worries Across Education Levels",
x = "Educational Level",
y = "Health Worries",
fill = "Educational Level"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
plot.subtitle = element_text(size = 12, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold subtitle
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space on the right and top/bottom
# Legend styling
legend.position = "none" # Remove legend
)

#Save Graph
ggsave("educ_worr_health.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Graph for health_satisfaction per educ:
ggplot(main_educ %>% filter(educ != 0), aes(x = factor(educ), y = health_satisfaction, fill = factor(educ))) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2, color = "black") + # Add transparency and black border
scale_x_discrete(
labels = c(
"1" = "Primary",
"2" = "Lower Secondary",
"3" = "Upper Secondary",
"4" = "Post-Secondary",
"5" = "Short-Cycle Tertiary",
"6" = "Bachelor's",
"7" = "Master's",
"8" = "Doctoral"
)
) +
scale_fill_brewer(palette = "RdBu", guide = "none") +
labs(
title = "Health Satisfaction by Educational Level",
subtitle = "Boxplot of Health Satisfaction Across Education Levels",
x = "Educational Level",
y = "Health Satisfaction",
fill = "Educational Level"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
plot.subtitle = element_text(size = 14, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold subtitle
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space on the right and top/bottom
# Legend styling
legend.position = "none" # Remove legend
)

#Save Graph
ggsave("educ_health_satisfaction.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
Our dataset mostly contains individuals with upper secondary
education levels, with only a few primary school level and doctoral
level. We expect those 2 groups to have a high variance.
In all graphs, we notice the same results: positive health perception
as well as health satisfaction increases with education, and negative
health perception through worrying or health decline decreases with
education. We expect education to be correlated with income: people with
higher education levels get higher salaries so the effects might be
linked.
Income Level Disparities
There are 3 individuals with income over 40k, we will be dropping
them for the visualisations as they skew them a lot.
# Aggregate data to calculate mean net income for each pid
cleaned_main <- main %>%
group_by(pid) %>%
summarize(net_income = mean(net_income, na.rm = TRUE), .groups = "drop") %>%
filter(net_income <= 40000) # Remove rows where mean income > 40000
# Distribution of income variable
ggplot(cleaned_main, aes(x = net_income)) +
geom_histogram(binwidth = 500, fill = "#163E64", color = "black", alpha = 0.7) + # Use dark blue fill and black borders
labs(
title = "Histogram of Net Income Distribution",
subtitle = "Distribution of Net Income Across Individuals",
x = "Net Income",
y = "Count"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, color = "#1E2B4F"), # Dark blue x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
plot.subtitle = element_text(size = 12, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold subtitle
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20) # Add more space on the right and top/bottom
)

ggsave("income.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Group data by pid and educ, and summarize
income_data <- main %>%
filter(net_income < 40000) %>%
group_by(pid, net_income) %>%
summarize(
health = mean(health, na.rm = TRUE),
health_decline_2yrs = mean(health_decline_2yrs, na.rm = TRUE),
worr_health_dummy = mean(worr_health_dummy, na.rm = TRUE),
health_satisfaction = mean(health_satisfaction, na.rm = TRUE),
.groups = "drop"
)
# Health per Income:
ggplot(income_data, aes(x = net_income, y = health)) +
geom_smooth(
aes(group = 1), # Ensure a single LOESS line for all bins
method = "loess",
color = "#F00000", # Red for LOESS line
size = 1.2,
se = FALSE, # No confidence interval shading
span = 1
) +
stat_summary_bin(fun = "mean", bins = 30, geom = "point", color = "#163E64", size = 2) +
labs(
title = "Health vs. Income (Binned Scatter Plot)",
x = "Net Income",
y = "Average Health"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, color = "#1E2B4F"), # Dark blue x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20) # Add more space on the right and top/bottom
)
## `geom_smooth()` using formula = 'y ~ x'

ggsave("income_health.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
## `geom_smooth()` using formula = 'y ~ x'
# Graph for health_decline_2yrs per income:
income_data <- income_data %>%
mutate(net_income_bin = cut(net_income, breaks = 10)) # Create income bins
# Reformat income bins into clear labels
income_data <- income_data %>%
mutate(
net_income_bin = cut(
net_income,
breaks = 10,
labels = c(
"< 2.5K",
"2.5K - 5K",
"5K - 7.5K",
"7.5K - 10K",
"10K - 12.5K",
"12.5K - 15K",
"15K - 17.5K",
"17.5K - 20K",
"20K - 22.5K",
"> 22.5K"
)
)
)
ggplot(income_data, aes(x = net_income_bin, y = health_decline_2yrs, fill = net_income_bin)) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2, color = "black") + # Add transparency and black border
scale_fill_brewer(palette = "RdBu", guide = "none") +
labs(
title = "Health Decline by Income (Binned)",
x = "Net Income (Binned)",
y = "Health Decline"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space on the right and top/bottom
# Legend styling
legend.position = "none" # Remove legend
)

ggsave("income_health_decline.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Graph for worr_health_dummy per income:
ggplot(income_data, aes(x = worr_health_dummy, fill = net_income_bin)) +
geom_density(alpha = 0.5, color = "black", size = 0.5) + # Add transparency and black outline
scale_fill_brewer(palette = "RdBu", name = "Net Income (Binned)") +
labs(
title = "Density of Health Worries by Income",
x = "Health Worries",
y = "Density"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, color = "#1E2B4F"), # Dark blue x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space on the right and top/bottom
# Legend styling
legend.position = "right", # Move legend to the top
legend.text = element_text(size = 10, color = "#1E2B4F"), # Dark blue legend text
legend.title = element_text(size = 12, face = "bold", color = "#1E2B4F") # Bold and dark blue legend title
)

ggsave("income_worr_health.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Group data by income bins and calculate average health satisfaction
heatmap_data <- income_data %>%
mutate(net_income_bin = cut(
net_income,
breaks = 10,
labels = c(
"< 2.5K",
"2.5K - 5K",
"5K - 7.5K",
"7.5K - 10K",
"10K - 12.5K",
"12.5K - 15K",
"15K - 17.5K",
"17.5K - 20K",
"20K - 22.5K",
"> 22.5K"
)
)) %>%
group_by(net_income_bin) %>%
summarize(avg_health_satisfaction = mean(health_satisfaction, na.rm = TRUE), .groups = "drop")
# Create the heatmap
ggplot(heatmap_data, aes(x = net_income_bin, y = 1, fill = avg_health_satisfaction)) +
geom_tile(color = "white") + # Add white borders for separation
scale_fill_gradient2(
low = "#67001F", # Deep Red
mid = "#F7F7F7", # Neutral White
high = "#053061", # Deep Blue
midpoint = mean(heatmap_data$avg_health_satisfaction, na.rm = TRUE) # Center the gradient around the mean
) +
labs(
title = "Average Health Satisfaction by Income Group",
x = "Net Income (Binned)",
y = "Health Satisfaction",
fill = ""
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted and bold x-axis labels
axis.text.y = element_blank(), # Remove y-axis labels (since it’s a single-row heatmap)
axis.ticks.y = element_blank(), # Remove y-axis ticks
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"),
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 14, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 70, b = 20, l = 50), # Add more space around the plot
# Legend styling
legend.position = "right", # Move legend to the top
)

ggsave("income_health_satisfaction.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
Most of the participants in this dataset have a net income lower than
5000 euros and we see that for those, the higher the income, the higher
the average health as we have a positively sloped line. However, for
people with higher than 5000 euros income, we see a high variance, with
some outliers therefore the negative trend cannot be confirmed.
Box Plot of Net Income by Education Level
ggplot(main, aes(x = factor(educ), y = net_income, fill = factor(educ))) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2, color = "black") +
scale_x_discrete(
labels = c(
"1" = "Primary",
"2" = "Lower Secondary",
"3" = "Upper Secondary",
"4" = "Post-Secondary",
"5" = "Short-Cycle Tertiary",
"6" = "Bachelor's",
"7" = "Master's",
"8" = "Doctoral"
)
) +
scale_fill_brewer(palette = "RdBu", guide = "none") +
labs(
title = "Net Income by Education Level",
x = "Education Level",
y = "Net Income",
fill = "Education Level"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space on the right and top/bottom
# Legend styling
legend.position = "none" # Remove legend
)

ggsave("income_educ.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
There is not a significant relationship between education level and
net income.
Age Disparities
# Distribution of age variable
ggplot(main, aes(x = age)) +
geom_histogram(aes(y = after_stat(density)), binwidth = 5, fill = "#A6CEE3", color = "black", alpha = 0.7) +
geom_density(color = "#1E2B4F", size = 1) +
labs(
title = "Combined Histogram and Density Plot of Age Distribution",
x = "Age",
y = "Density"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, color = "#1E2B4F"), # Dark blue x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20) # Add more space around the plot
)

ggsave("age.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Health per age:
# Filter and create age groups
age_data <- main %>%
filter(!is.na(age)) %>% # Remove NA values in the age column
mutate(
age_group = cut(
age,
breaks = c(25, 30, 35, 40, 45, 50, 55),
include.lowest = TRUE, # Ensures 25 is included in the first group
right = TRUE, # Ensures 55 is included in the last group
labels = c("[25-30)", "[30-35)", "[35-40)", "[40-45)", "[45-50)", "[50-55]") # Labels with brackets
)
)
ggplot(age_data, aes(x = age_group, fill = age_group)) +
geom_bar(alpha = 0.7, color = "black") + # Add transparency and black borders
scale_fill_brewer(palette = "RdBu", guide = "none") +
labs(
title = "Age Group Distribution",
x = "Age Group",
y = "Count"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "none" # Remove legend
)

ggsave("age2.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Create violin plot for health by 5-year age groups
ggplot(age_data, aes(x = age_group, y = health, fill = age_group)) +
geom_violin(trim = FALSE, alpha = 0.7, color = "black") + # Add transparency and black border
scale_fill_brewer(palette = "RdBu", guide = "none") +
labs(
title = "Distribution of Health by 5-Year Age Groups",
x = "Age Group",
y = "Health"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "none" # Remove legend
)

ggsave("age_health.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Graph for health_decline_2yrs per age:
ggplot(age_data, aes(x = age_group, fill = factor(health_decline_2yrs))) +
geom_bar(position = "fill", alpha = 0.7, color = "black") +
scale_fill_manual(
values = c("#A6CEE3", "#1E2B4F"),
labels = c("No Decline", "Decline"),
name = "Health Decline"
) +
labs(
title = "Proportion of Health Decline by Age Group",
subtitle = "Stacked Bar Chart Showing Health Decline Across Age Groups",
x = "Age Group",
y = "Proportion",
fill = "Health Decline"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted and bold x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
plot.subtitle = element_text(size = 14, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold subtitle
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "top", # Move legend to the top
legend.text = element_text(size = 12, color = "#1E2B4F"), # Dark blue legend text
legend.title = element_text(size = 14, face = "bold", color = "#1E2B4F") # Bold and dark blue legend title
)

ggsave("age_health_decline.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Graph for worr_health_dummy per age:
ggplot(age_data, aes(x = age_group, fill = factor(worr_health_dummy))) +
geom_bar(position = "fill", alpha = 0.7, color = "black") + # Add transparency and black borders
scale_fill_manual(
values = c("#A6CEE3", "#1E2B4F"),
labels = c("No Worries", "Worries"),
name = "Health Worry"
) +
labs(
title = "Proportion of Health Worries by Age Group",
x = "Age Group",
y = "Proportion",
fill = "Health Worry"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted and bold x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "top", # Move legend to the top
legend.text = element_text(size = 12, color = "#1E2B4F"), # Dark blue legend text
legend.title = element_text(size = 14, face = "bold", color = "#1E2B4F") # Bold and dark blue legend title
)

ggsave("age_worr_health.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Graph for health_satisfaction per age:
ggplot(age_data, aes(x = age_group, y = health_satisfaction, fill = age_group)) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2, color = "black") + # Add transparency and black border
scale_fill_brewer(palette = "RdBu", guide = "none") +
labs(
title = "Health Satisfaction by Age Group",
x = "Age Group",
y = "Health Satisfaction"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted and bold x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "none" # Remove legend
)

ggsave("age_health_satisfaction.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
In our dataset, the most participants we have are around the age of
45. For all health variables, we notice the same trend: the positive
subjective health perceptions are higher within younger individuals,
health worries increases with age, and health satisfaction decreases
with age. We also notice that health decline is stable across all age
groups at around 24% and health doesn’t largely vary. Because we expect
older individuals to have a higher income, we should look into the
intersection of age and income and their impact on health.
Scatter Plot of Net Income vs. Age
ggplot(main, aes(x = age, y = net_income)) +
geom_point(alpha = 0.6, color = "#1E2B4F") +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, color = "#1E2B4F"), # Dark blue x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20) # Add more space around the plot
)

ggsave("income_age.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
Net income does increase with age but not significantly.
Marital Status Disparities
# Group data by pid and rel_status, and summarize
main_rel <- main %>%
group_by(pid, rel_status) %>%
summarize(
health = mean(health, na.rm = TRUE),
health_decline_2yrs = mean(health_decline_2yrs, na.rm = TRUE),
worr_health_dummy = mean(worr_health_dummy, na.rm = TRUE),
health_satisfaction = mean(health_satisfaction, na.rm = TRUE),
.groups = "drop"
)
# Distribution of rel_status variable
ggplot(main_rel, aes(x = factor(rel_status), fill = factor(rel_status))) +
geom_bar(alpha = 0.7, color = "black") + # Add transparency and black borders
scale_fill_brewer(palette = "RdBu", guide = "none") +
labs(
title = "Distribution of Relationship Status",
x = "Relationship Status",
y = "Count"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted and bold x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title and subtitle styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "none" # Remove legend
)

ggsave("rel_status.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Health per rel_status:
ggplot(main_rel, aes(x = factor(rel_status), y = health, fill = factor(rel_status))) +
geom_bar(stat = "summary", fun = "mean", alpha = 0.7, color = "black") + # Add transparency and black borders
scale_fill_brewer(palette = "RdBu", guide = "none") +
labs(
title = "Average Health by Relationship Status",
x = "Relationship Status",
y = "Average Health"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted and bold x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "none" # Remove legend
)

ggsave("rel_status_health.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Graph for health_decline_2yrs per rel_status:
ggplot(main_rel, aes(x = factor(rel_status), y = health_decline_2yrs, fill = factor(rel_status))) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2, color = "black") + # Add transparency and black border
scale_fill_brewer(palette = "RdBu", guide = "none") +
labs(
title = "Health Decline by Relationship Status",
x = "Relationship Status",
y = "Health Decline"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted and bold x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "none" # Remove legend
)

ggsave("rel_status_health_decline.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Graph for worr_health_dummy per rel_status:
ggplot(main_rel, aes(x = factor(rel_status), y = worr_health_dummy, fill = factor(rel_status))) +
geom_bar(stat = "summary", fun = "mean", alpha = 0.7, color = "black") + # Add transparency and black borders
scale_fill_brewer(palette ="RdBu", guide = "none") +
labs(
title = "Average Health Worries by Relationship Status",
x = "Relationship Status",
y = "Average Health Worries"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted and bold x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 50, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "none" # Remove legend
)

ggsave("rel_status_worr_health.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Graph for health_satisfaction per rel_status:
ggplot(main_rel, aes(x = factor(rel_status), y = health_satisfaction, fill = factor(rel_status))) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2, color = "black") + # Add transparency and black borders
scale_fill_brewer(palette = "RdBu", guide = "none") +
labs(
title = "Health Satisfaction by Relationship Status",
x = "Relationship Status",
y = "Health Satisfaction"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted and bold x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "none" # Remove legend
)

ggsave("rel_status_health_satisfaction.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
Our dataset contains mostly married individuals, which makes sense as
our main population is around the age of 45. Average health is quite
consistent across all relationship statuses, as well as health decline
and health satisfaction. It is different for separated individuals and
those with spouses abroad but the low number of these observations
causing high variance does not allow us to make conclusions.
We notice that single individuals are the least worried about their
health but we expect this to be caused by the younger age of single
participants. We will therefore examine the relationship between marital
status and age.
Box Plot of Age by Relationship Status
ggplot(main, aes(x = factor(rel_status), y = age, fill = factor(rel_status))) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2, color = "black") +
scale_x_discrete(
labels = c(
"1" = "Single",
"2" = "Married",
"3" = "Divorced",
"4" = "Widowed"
)
) +
scale_fill_brewer(palette = "RdBu", guide = "none") +
labs(
title = "Age by Relationship Status",
x = "Relationship Status",
y = "Age"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted and bold x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "none" # Remove legend
)

ggsave("rel_status_age.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
As expected, the single group is younger but this is not significant
as the variance is quite high.
Disparities by Place of Birth
# Group data by pid and germborn, and summarize
main_germborn <- main %>%
group_by(pid, germborn) %>%
summarize(
health = mean(health, na.rm = TRUE),
health_decline_2yrs = mean(health_decline_2yrs, na.rm = TRUE),
worr_health_dummy = mean(worr_health_dummy, na.rm = TRUE),
health_satisfaction = mean(health_satisfaction, na.rm = TRUE),
.groups = "drop"
)
# Distribution of germborn variable
ggplot(main_germborn, aes(x = factor(germborn), fill = factor(germborn))) +
geom_bar(alpha = 0.7, color = "black") + # Add transparency and black borders
scale_x_discrete(
labels = c(
"0" = "Born outside Germany",
"1" = "Born in Germany"
)
) +
scale_fill_manual(
values = c("#A6CEE3", "#1E2B4F"),
guide = "none" # Remove legend
) +
labs(
title = "Distribution of German-Born Individuals",
x = "Migration Status",
y = "Count"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20) # Add more space around the plot
)

ggsave("germborn.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Health per germborn:
ggplot(main_germborn, aes(x = factor(germborn), y = health, fill = factor(germborn))) +
geom_violin(trim = FALSE, alpha = 0.7, color = "black") + # Add transparency and black borders
scale_fill_manual(
values = c("#A6CEE3", "#1E2B4F"), # Light blue and dark blue
labels = c("Born Outside of Germany", "Born in Germany"),
name = "German-Born Status"
) +
scale_x_discrete(
labels = c(
"0" = "Born Outside of Germany",
"1" = "Born in Germany"
)
) +
labs(
title = "Distribution of Health by Migration Status",
x = "",
y = "Health"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "none" # Remove legend
)

ggsave("germborn_health.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Graph for health_decline_2yrs per germborn:
ggplot(main_germborn, aes(x = factor(germborn), y = health_decline_2yrs, fill = factor(germborn))) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2, color = "black") + # Add transparency and black borders
scale_fill_manual(
values = c("#A6CEE3", "#1E2B4F"), # Light blue and dark blue
labels = c("Born Outside of Germany", "Born in Germany"),
name = "Migration Status"
) +
scale_x_discrete(
labels = c(
"0" = "Born Outside of Germany",
"1" = "Born in Germany"
)
) +
labs(
title = "Health Decline by Migration Status",
x = "",
y = "Health Decline"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, face = "bold", color = "#1E2B4F", hjust = 0.5), # Centered and bold x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "none" # Remove legend
)

ggsave("germborn_health_decline.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Graph for worr_health_dummy per germborn:
ggplot(main_germborn, aes(x = worr_health_dummy, fill = factor(germborn))) +
geom_density(alpha = 0.5, color = "black") + # Add transparency and black outline for density curves
scale_fill_manual(
values = c("#F00000", "#1E2B4F"),
labels = c("Born Outside of Germany", "Born in Germany"),
name = "Migration Status"
) +
labs(
title = "Density of Health Worries by Migration Status",
x = "Health Worries",
y = "Density"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, color = "#1E2B4F"), # Dark blue x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "top", # Place legend at the top
legend.text = element_text(size = 12, color = "#1E2B4F"), # Dark blue legend text
legend.title = element_text(size = 14, face = "bold", color = "#1E2B4F") # Bold and dark blue legend title
)

ggsave("germborn_worr_health.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# Graph for health_satisfaction per germborn:
ggplot(main_germborn, aes(x = factor(germborn), y = health_satisfaction, fill = factor(germborn))) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2, color = "black") + # Add transparency and black borders
scale_fill_manual(
values = c("#A6CEE3", "#1E2B4F"), # Light blue and dark blue
labels = c("Born Outside of Germany", "Born in Germany"),
name = "Migration Status"
) +
scale_x_discrete(
labels = c(
"0" = "Born Outside of Germany",
"1" = "Born in Germany"
)
) +
labs(
title = "Health Satisfaction by Migration Status",
x = "",
y = "Health Satisfaction"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, face = "bold", color = "#1E2B4F", hjust = 0.5), # Bold and centered dark blue x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "none" # Remove legend
)

ggsave("germborn_health_satisfaction.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
Our dataset mostly contains individuals born in Germany, which makes
sense as it is the SOEP data. For people born in Germany or Outside of
Germany, health distributions are quite similar. We do notice that
individuals born outside of Germany have smaller health declines but
also higher health worries and lower health satisfaction. The small
differences, the contradictory findings, merged with the fact that we
only have a small sample of individuals born outside of Germany does not
allow us to make conclusion.
Relationship between subjective and objective health
# worr_health_categorical per health_satisfaction:
ggplot(main, aes(x = factor(worr_health_categorical), y = health_satisfaction, fill = factor(worr_health_categorical))) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2, color = "black") + # Add transparency and black borders
scale_fill_brewer(palette = "RdBu", guide = "none") +
scale_x_discrete(
labels = c(
"1" = "Does Not Worry",
"2" = "Worries a Little",
"3" = "Worries a Lot"
)
) +
labs(
title = "Health Satisfaction by Health Worry Categories",
x = "Health Worry Category",
y = "Health Satisfaction"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted and bold x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "none" # Remove legend
)

ggsave("worr_health_health_satisfaction.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# health_satisfaction per life_satisfaction:
ggplot(main, aes(x = life_satisfaction, y = health_satisfaction)) +
geom_bin2d(bins = 30) +
scale_fill_gradient(
low = "#A6CEE3", # Light blue
high = "#1E2B4F", # Dark blue
name = "Count" # Legend title
) +
labs(
title = "Health Satisfaction vs. Life Satisfaction",
x = "Life Satisfaction",
y = "Health Satisfaction"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "right", # Position legend on the right
legend.text = element_text(size = 12, color = "#1E2B4F"), # Dark blue legend text
legend.title = element_text(size = 14, face = "bold", color = "#1E2B4F") # Bold and dark blue legend title
)

ggsave("life_health_satisfaction.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
# health per BMI graph:
ggplot(main, aes(x = factor(bmi_categorical), y = health, fill = factor(bmi_categorical))) +
geom_violin(trim = FALSE, alpha = 0.7, color = "black") + # Add transparency and black borders
scale_fill_brewer(palette = "RdBu", guide = "none") +
scale_x_discrete(
labels = c(
"1" = "Underweight",
"2" = "Normal Weight",
"3" = "Overweight",
"4" = "Obese"
)
) +
labs(
title = "Health Distribution by BMI Category",
x = "BMI Category",
y = "Health"
) +
theme_minimal(base_size = 14) +
theme(
# Backgrounds
panel.background = element_rect(fill = "white", color = NA), # White background
plot.background = element_rect(fill = "white", color = NA), # White outer background
# Gridlines
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
# Axis styling
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, face = "bold", color = "#1E2B4F"), # Tilted and bold dark blue x-axis labels
axis.text.y = element_text(size = 12, color = "#1E2B4F"), # Dark blue y-axis labels
axis.title.x = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue x-axis title
axis.title.y = element_text(size = 14, face = "bold", color = "#1E2B4F"), # Bold and dark blue y-axis title
# Title styling
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, color = "#1E2B4F", margin = ggplot2::margin(b = 10)), # Centered and bold title
# Adjust overall plot margins
plot.margin = ggplot2::margin(t = 20, r = 100, b = 20, l = 20), # Add more space around the plot
# Legend styling
legend.position = "none" # Remove legend
)

ggsave("health_bmi.png", plot = last_plot(), dpi = 300)
## Saving 7 x 5 in image
Further analyses to confirm our expectations, we see that the higher
the health satisfaction, the lower the individuals worry about their
health and that goes both ways.
Similarly, the higher life satisfaction, the higher health
satisfaction as we observe a linear increasing relationship between the
two.
Analyzing BMI, we also see that normal and overweight individuals
show higher health. However, due to the low amount of observations of
underweight and obese individuals, we cannot make conclusions.